To run: Beforehand:
module load gcc
In R:
setwd("~/shared-gandalm/brain_CTP/Scripts/integration/pgs/pgs_analysis")
rmarkdown::render("pgs_analysis.Rmd", "html_document")
library(data.table)
## data.table 1.14.2 using 6 threads (see ?getDTthreads). Latest news: r-datatable.com
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(broom)
library(ggplot2)
library(wesanderson)
library(ggrepel)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(DT)
asd_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.ASD_Grove2018_logOR.sbayesr.sscore"
azd_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.AZD_Marioni2018.sbayesr.sscore"
scz_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.SCZ_Pardinas2018.sbayesr.sscore"
#scz_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.SCZ_Pardinas2018_v2.sbayesr.sscore"
height_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.Height_Yengo2018.sbayesr.sscore"
mdd_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.MDD_Howard2019.sbayesr.sscore"
eduyears_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.EduYears_Lee2018.sbayesr.sscore"
chronotype_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.Chronotype_Jones2019.sbayesr.sscore"
iq_pgs_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/scores/ROSMAP_ASDbrain_LIBD_pgs.IQ_Savage2018.sbayesr.sscore"
pheno_dir <- "~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/ctp_clr1e-3_aggregated_scz_asd_azd.txt"
out_dir <- "~/shared-gandalm/brain_CTP/Data/integration/pgs/analysis"
asd_pgs <- read.delim(asd_pgs_dir, header = T, as.is = T)
azd_pgs <- read.delim(azd_pgs_dir, header = T, as.is = T)
scz_pgs <- read.delim(scz_pgs_dir, header = T, as.is = T)
height_pgs <- read.delim(height_pgs_dir, header = T, as.is = T)
mdd_pgs <- read.delim(mdd_pgs_dir, header = T, as.is = T)
eduyears_pgs <- read.delim(eduyears_pgs_dir, header = T, as.is = T)
#chronotype_pgs <- read.delim(chronotype_pgs_dir, header = T, as.is = T)
#iq_pgs <- read.delim(iq_pgs_dir, header = T, as.is = T)
pgs_trait_all <- TRUE
# Change depending on analysis
pgs_name <- c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "Height_PGS")
pgs_name <- c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "EduYears_PGS", "MDD_PGS", "Height_PGS")
#pgs_name <- c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "Height_PGS", "Chronotype_PGS", "IQ_PGS")
pheno <- read.delim(pheno_dir, header = T, as.is = T)
pheno$age2 <- pheno$age^2
asd_exclude <- read.delim("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/asd_exclude_studybalance.id", header = F, as.is = T)
scz_exclude <- read.delim("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/scz_exclude_studybalance.id", header = F, as.is = T)
asd_pgs <- asd_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
azd_pgs <- azd_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
scz_pgs <- scz_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
height_pgs <- height_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
mdd_pgs <- mdd_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
eduyears_pgs <- eduyears_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
#chronotype_pgs <- chronotype_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
#iq_pgs <- iq_pgs %>% dplyr::select(c("IID", "SCORE1_AVG"))
colnames(asd_pgs)[which(colnames(asd_pgs) == "SCORE1_AVG")] <- "asd_pgs_raw"
colnames(azd_pgs)[which(colnames(azd_pgs) == "SCORE1_AVG")] <- "azd_pgs_raw"
colnames(scz_pgs)[which(colnames(scz_pgs) == "SCORE1_AVG")] <- "scz_pgs_raw"
colnames(height_pgs)[which(colnames(height_pgs) == "SCORE1_AVG")] <- "height_pgs_raw"
colnames(mdd_pgs)[which(colnames(mdd_pgs) == "SCORE1_AVG")] <- "mdd_pgs_raw"
colnames(eduyears_pgs)[which(colnames(eduyears_pgs) == "SCORE1_AVG")] <- "eduyears_pgs_raw"
#colnames(chronotype_pgs)[which(colnames(chronotype_pgs) == "SCORE1_AVG")] <- "chronotype_pgs_raw"
#colnames(iq_pgs)[which(colnames(iq_pgs) == "SCORE1_AVG")] <- "iq_pgs_raw"
pgs <- inner_join(asd_pgs, azd_pgs, by = "IID")
pgs <- inner_join(pgs, scz_pgs, by = "IID")
pgs <- inner_join(pgs, height_pgs, by = "IID")
if (pgs_trait_all == TRUE) {
pgs <- inner_join(pgs, mdd_pgs, by = "IID")
pgs <- inner_join(pgs, eduyears_pgs, by = "IID")
#pgs <- inner_join(pgs, chronotype_pgs, by = "IID")
#pgs <- inner_join(pgs, iq_pgs, by = "IID")
}
colnames(pgs)[which(colnames(pgs) == "IID")] <- "genoid"
# becuase of temporary duplicated merged bfile
pgs <- pgs[!duplicated(pgs),]
rosmap_id <- read.delim("~/shared-gandalm/GenomicDatasets/ROSMAP/ROSMAP_metadata/ROSMAP_wgs_id_fordatalinkage.txt", header = T, as.is = T)
rosmap_genoid <- rosmap_id[,c("individualID", "specimenID")]
colnames(rosmap_genoid) <- c("IID", "genoid")
libd_id <- read.delim("~/shared-gandalm/GenomicDatasets/LIBD_phase2/methyl/Jaffe_methylation_DLPFC/pheno/GSE74193_series_matrix_pheno_keep_df.txt", header = T, as.is = T)
libd_genoid <- libd_id[,c("ID", "brnum")]
colnames(libd_genoid) <- c("IID", "genoid")
asdbrain_id <- read.csv("~/shared-gandalm/brain_CTP/Data/pheno/ASD_methylation_brain/Wong_ASD_Brain_Methylation_SuppTable1_pheno.csv")
asdbrain_genoid <- asdbrain_id[,c("SampleName", "BrainID")]
colnames(asdbrain_genoid) <- c("IID", "genoid")
# Combine together and save
genoid <- rbind(rosmap_genoid, libd_genoid, asdbrain_genoid)
write.table(genoid, paste(out_dir, "/ROSMAP_LIBD_ASDbrain_genoid_IID.id", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
# Write table with genoid overlapping with pheno
genoid_ctp <- genoid[which(genoid$IID %in% pheno$IID),]
write.table(genoid_ctp[,c(2,2,1)], paste("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_LIBD_ASDbrain_genoid_IID_brainCTP.id", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
pgs_genoid <- inner_join(genoid, pgs, by = "genoid")
pgs_pheno <- inner_join(pheno, pgs_genoid)
## Joining, by = "IID"
rosmap_pop <- read.delim("~/shared-gandalm/GenomicDatasets/ROSMAP/ROSMAP_WGS/qc_check/pc_and_population.txt", header = T, as.is = T)
libd_pop <- read.delim("~/shared-gandalm/brain_CTP/Data/genotyping/Jaffe2018/merge_arrays/pc_and_population_Illumina_1M_Illumina_h650.txt", header = T, as.is = T)
asdbrain_pop <- read.delim("~/shared-gandalm/GenomicDatasets/PsychENCODE/Genotypes/IndividualStudies/UCLA_ASD_synapse/grm/pc_and_population.txt", header = T, as.is = T)
asdbrain_pop$IID <- gsub("/", "_", asdbrain_pop$IID)
pop <- rbind(rosmap_pop, libd_pop, asdbrain_pop)
pop_ctp <- pop %>% filter(IID %in% pgs_pheno$genoid)
pop_eur <- pop %>% filter(population == "EUR") %>% mutate(genoid = IID)
pop_ctp_eur <- pop %>% filter(IID %in% pop_ctp$IID) %>% filter(IID %in% pop_eur$IID)
write.table(pop, "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/pc_and_population_ROSMAP_LIBD_ASDbrain.txt", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pop_eur, "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/pc_and_population_ROSMAP_LIBD_ASDbrain_EUR.txt", col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pop_ctp_eur, "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/pc_and_population_ROSMAP_LIBD_ASDbrain_brainCTP_EUR.txt", col.names = T, row.names = F, quote = F, sep = "\t")
pgs_pheno_eur0 <- pgs_pheno %>% filter(genoid %in% pop_eur$genoid)
Write tables
ctp_pc <- readRDS("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/ctp_noclr_pc_ROSMAP_ASDbrain_LIBD.rds")
tmp <- inner_join(pheno, ctp_pc[,c(1, grep("Comp", colnames(ctp_pc)))], by = "IID")
pgs_pheno_ctp_pc <- inner_join(tmp, pgs_genoid, by = "IID")
pgs_pheno_ctp_pc_pop <- inner_join(pgs_pheno_ctp_pc, data.frame(genoid = pop_ctp$IID, population = pop_ctp$population), by = "genoid")
write.table(pgs_pheno_ctp_pc_pop, paste(out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_raw_allancestry.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
Counts of ancestral groups
pop %>% group_by(str, population) %>% dplyr::summarise(n=n()) %>% pivot_wider(names_from = population, values_from = n) %>% mutate(total = sum(AFR, EUR, other, EAS, SAS, na.rm = T))
## `summarise()` has grouped output by 'str'. You can override using the `.groups`
## argument.
## # A tibble: 4 × 7
## # Groups: str [4]
## str AFR EUR other SAS EAS total
## <chr> <int> <int> <int> <int> <int> <int>
## 1 LIBD_Illumina_1M 152 157 20 NA NA 329
## 2 LIBD_Illumina_h650 64 63 5 1 NA 133
## 3 ROSMAP_WGS 1 1132 2 NA NA 1135
## 4 UCLA_ASD 8 88 5 2 2 105
pop_ctp %>% group_by(str, population) %>% dplyr::summarise(n=n()) %>% pivot_wider(names_from = population, values_from = n) %>% mutate(total = sum(AFR, EUR, other, EAS, SAS, na.rm = T))
## `summarise()` has grouped output by 'str'. You can override using the `.groups`
## argument.
## # A tibble: 4 × 7
## # Groups: str [4]
## str AFR EUR other SAS EAS total
## <chr> <int> <int> <int> <int> <int> <int>
## 1 LIBD_Illumina_1M 122 152 19 NA NA 293
## 2 LIBD_Illumina_h650 57 61 5 1 NA 124
## 3 ROSMAP_WGS NA 633 NA NA NA 633
## 4 UCLA_ASD 2 44 4 2 1 53
pop_ctp_eur %>% group_by(str, population) %>% dplyr::summarise(n=n())
## `summarise()` has grouped output by 'str'. You can override using the `.groups`
## argument.
## # A tibble: 4 × 3
## # Groups: str [4]
## str population n
## <chr> <chr> <int>
## 1 LIBD_Illumina_1M EUR 152
## 2 LIBD_Illumina_h650 EUR 61
## 3 ROSMAP_WGS EUR 633
## 4 UCLA_ASD EUR 44
genopc_eur <- read.table("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_imputed_MERGE_1_22_QC_EUR_brainCTP_ldprune_pca20.eigenvec", header = F, as.is = T)
genopc123_eur <- genopc_eur[,2:5]
colnames(genopc123_eur) <- c("genoid", "PC1", "PC2", "PC3")
pgs_pheno_eur <- inner_join(pgs_pheno_eur0, genopc123_eur, by = "genoid")
# Check ancestry clustering
ggplot(pgs_pheno_eur, aes(x = PC1, y = PC2, colour = study, alpha = 0.2)) + geom_point() + scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")
ggplot(pgs_pheno_eur, aes(x = PC1, y = PC3, colour = study, alpha = 0.2)) + geom_point() + scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")
ggplot(pgs_pheno_eur, aes(x = PC2, y = PC3, colour = study, alpha = 0.2)) + geom_point() + scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")
# Check outliers
# print("Outliers based on SD of PC > 4")
# pc1_sd <- sd(pgs_pheno_eur$PC1)
# pc2_sd <- sd(pgs_pheno_eur$PC2)
# pc3_sd <- sd(pgs_pheno_eur$PC3)
#
# pgs_pheno_eur %>% filter(PC1/pc1_sd > 4) %>% datatable()
# pgs_pheno_eur %>% filter(PC2/pc2_sd > 4) %>% datatable()
# pgs_pheno_eur %>% filter(PC3/pc3_sd > 4) %>% datatable()
#
# eur_outliers <- pgs_pheno_eur %>% filter((PC1/pc1_sd > 4) | (PC2/pc2_sd > 4) | (PC3/pc3_sd > 4))
# write.table(eur_outliers[,c("genoid", "genoid")], "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_EUR_outliers_PC123sd4.id", col.names = T, row.names = F, quote = F, sep = "\t")
# Remove ROSMAP outliers based on PC2
# rosmap2_outliers <- pgs_pheno_eur %>% filter(PC3 < -0.5)
# write.table(rosmap2_outliers[,c("genoid", "genoid")], "~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_EUR_outliers_PC123_PC3ROSMAP2.id", col.names = F, row.names = F, quote = F, sep = "\t")
# Exclude the following specimenID
# SM-CTEIJ
# SM-CTEMN
# SM-CTEI8
# SM-CTEN3
# SM-CTED9
# SM-CTEE2
# The below PCA still includes relatives. It shows taht keeping relatives means that the PCs capture family structure rather than population structure
# genopc_eur <- read.table("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_imputed_MERGE_1_22_QC_EUR_brainCTP_ldprune_exclROSMAPoutlier_pca20.eigenvec", header = F, as.is = T)
# This PCA removes all relatives (rel < 0.05)
genopc_eur <- read.table("~/shared-gandalm/brain_CTP/Data/genotyping/megaanalysis/ROSMAP_ASDbrain_LIBD_imputed_MERGE_1_22_QC_EUR_brainCTP_rel0.05_ldprune_pca20.eigenvec", header = F, as.is = T)
genopc123_eur <- genopc_eur[,2:5]
colnames(genopc123_eur) <- c("genoid", "PC1", "PC2", "PC3")
pgs_pheno_eur <- inner_join(pgs_pheno_eur0, genopc123_eur, by = "genoid")
# Check ancestry clustering
ggplot(pgs_pheno_eur, aes(x = PC1, y = PC2, colour = study, alpha = 0.2)) + geom_point() + scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")
ggplot(pgs_pheno_eur, aes(x = PC1, y = PC3, colour = study, alpha = 0.2)) + geom_point() + scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")
ggplot(pgs_pheno_eur, aes(x = PC2, y = PC3, colour = study, alpha = 0.2)) + geom_point() + scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "")
asd_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(asd_pgs_raw) %>% mean()
asd_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(asd_pgs_raw) %>% sd()
azd_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(azd_pgs_raw) %>% mean()
azd_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(azd_pgs_raw) %>% sd()
scz_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(scz_pgs_raw) %>% mean()
scz_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(scz_pgs_raw) %>% sd()
height_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(height_pgs_raw) %>% mean()
height_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(height_pgs_raw) %>% sd()
pgs_pheno_eur <- pgs_pheno_eur %>%
mutate(ASD_PGS = (asd_pgs_raw - asd_pgs_ctl_mean)/asd_pgs_ctl_sd) %>%
mutate(AZD_PGS = (azd_pgs_raw - azd_pgs_ctl_mean)/azd_pgs_ctl_sd) %>%
mutate(SCZ_PGS = (scz_pgs_raw - scz_pgs_ctl_mean)/scz_pgs_ctl_sd) %>%
mutate(Height_PGS = (height_pgs_raw - height_pgs_ctl_mean)/height_pgs_ctl_sd)
if (pgs_trait_all == TRUE) {
mdd_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(mdd_pgs_raw) %>% mean()
mdd_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(mdd_pgs_raw) %>% sd()
eduyears_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(eduyears_pgs_raw) %>% mean()
eduyears_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(eduyears_pgs_raw) %>% sd()
# chronotype_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(chronotype_pgs_raw) %>% mean()
# chronotype_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(chronotype_pgs_raw) %>% sd()
# iq_pgs_ctl_mean <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(iq_pgs_raw) %>% mean()
# iq_pgs_ctl_sd <- pgs_pheno_eur %>% filter(dx == "CTL") %>% pull(iq_pgs_raw) %>% sd()
pgs_pheno_eur <- pgs_pheno_eur %>%
mutate(MDD_PGS = (mdd_pgs_raw - mdd_pgs_ctl_mean)/mdd_pgs_ctl_sd) %>%
mutate(EduYears_PGS = (eduyears_pgs_raw - eduyears_pgs_ctl_mean)/eduyears_pgs_ctl_sd)
# mutate(Chronotype_PGS = (chronotype_pgs_raw - chronotype_pgs_ctl_mean)/chronotype_pgs_ctl_sd) %>%
# mutate(IQ_PGS = (iq_pgs_raw - iq_pgs_ctl_mean)/iq_pgs_ctl_sd)
}
MatchDx <- match(pgs_pheno_eur$dx, c("CTL", "ASD", "SCZ", "AZD"))
pgs_pheno_eur$dx <- fct_reorder(pgs_pheno_eur$dx, MatchDx)
write.table(pgs_pheno_eur, paste(out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.txt", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
pgs_pheno_eur %>% group_by(study, dx) %>% dplyr::summarise(n=n())
pgs_pheno_eur %>% group_by(study, dx) %>% filter(!is.na(age) | !is.na(sex)) %>% dplyr::summarise(n=n())
pheno_out_dir <- "~/shared-gandalm/brain_CTP/Data/integration/gcta/pheno/ROSMAP_LIBD_ASDbrain"
# Try and see if removing low clr-transformed values improves h2
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Exc >= -2), c("genoid", "genoid", "Exc")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Exc.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Inh >= -2), c("genoid", "genoid", "Inh")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Inh.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Astro >= -2), c("genoid", "genoid", "Astro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Astro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Endo >= -2), c("genoid", "genoid", "Endo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Endo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Micro >= -2), c("genoid", "genoid", "Micro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Micro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$Oligo >= -2), c("genoid", "genoid", "Oligo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_Oligo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[which(pgs_pheno_eur$OPC >= -2), c("genoid", "genoid", "OPC")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_neg2_OPC.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
# clr-transformed data
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Exc")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Exc.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Inh")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Inh.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Astro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Astro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Endo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Endo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Micro")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Micro.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "Oligo")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_Oligo.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "OPC")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.hseq_clr1e3_OPC.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "age", "PC1", "PC2", "PC3")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.age_genoPC123.qcov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "age", "age2", "PC1", "PC2", "PC3")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.age_age2_genoPC123.qcov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "sex", "batch", "dx")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.sex_batch_dx.cov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(pgs_pheno_eur[,c("genoid", "genoid", "sex", "batch")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.sex_batch.cov", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
# Write CTP PCs
ctp_pc <- readRDS("~/shared-gandalm/brain_CTP/Data/integration/ctp_differences/ctp_noclr_pc_ROSMAP_ASDbrain_LIBD.rds")
ctp_pc_genoid <- inner_join(genoid, ctp_pc, by = "IID")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.1")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc1.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.2")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc2.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.3")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc3.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.4")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc4.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.5")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc5.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
write.table(ctp_pc_genoid[,c("genoid", "genoid", "Comp.6")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.ctp_pc6.pheno", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
# CTL IDs
write.table(pgs_pheno_eur[which(pgs_pheno_eur$dx == "CTL"),c("genoid", "genoid")], paste(pheno_out_dir, "/ROSMAP_LIBD_ASDbrain_pheno_pgs_EUR.CTL.id", sep = ""), col.names = T, row.names = F, quote = F, sep = "\t")
pgs_pheno_eur.long <- melt(pgs_pheno_eur, measure.vars = c(all_of(pgs_name)), variable.name = "Trait", value.name = "PGS")
## Warning in melt(pgs_pheno_eur, measure.vars = c(all_of(pgs_name)),
## variable.name = "Trait", : The melt generic in data.table has been passed
## a data.frame and will attempt to redirect to the relevant reshape2 method;
## please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(pgs_pheno_eur). In the next version, this warning will become an
## error.
mean.df <- pgs_pheno_eur.long %>% group_by(study, dx, Trait) %>% dplyr::summarise(mean = mean(PGS))
## `summarise()` has grouped output by 'study', 'dx'. You can override using the
## `.groups` argument.
matchDx <- match(mean.df$dx, c("CTL", "ASD", "SCZ", "AZD"))
pgs_pheno_eur.long %>%
mutate(MatchDx = match(dx, c("CTL", "ASD", "SCZ", "AZD"))) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
mutate(dx = fct_reorder(dx, MatchDx)) %>%
#mutate(MatchTrait = match(Trait, all_of(pgs_name))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = PGS, fill = dx)) +
geom_density(alpha = 0.5) +
geom_vline(data=mean.df, aes(xintercept=mean, colour=dx), linetype="dashed", size=0.5) +
facet_grid(Trait ~ study) +
scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
theme_bw()
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution.svg", sep = ""), height = 10, width = 7)
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution.png", sep = ""), height = 10, width = 7)
mean.df2 <- pgs_pheno_eur.long %>% group_by(study, dx, Trait) %>% dplyr::summarise(mean = mean(PGS)) %>% filter((study == "ASDbrain" & Trait == "ASD_PGS") | (study == "LIBD" & Trait == "SCZ_PGS") | (study == "ROSMAP" & Trait == "AZD_PGS"))
## `summarise()` has grouped output by 'study', 'dx'. You can override using the
## `.groups` argument.
# Diagnosis-specific plot only
pgs_pheno_eur.long %>% filter((study == "ASDbrain" & Trait == "ASD_PGS") | (study == "LIBD" & Trait == "SCZ_PGS") | (study == "ROSMAP" & Trait == "AZD_PGS")) %>%
mutate(MatchDx = match(dx, c("CTL", "ASD", "SCZ", "AZD"))) %>%
mutate(dx = fct_reorder(dx, MatchDx)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = PGS, fill = dx)) +
geom_density(alpha = 0.5) +
geom_vline(data=mean.df2, aes(xintercept=mean, colour=dx), linetype="dashed", size=0.5) +
facet_wrap( ~ study, ncol = 2) +
scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
theme_bw()
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx.svg", sep = ""), height = 4, width = 4)
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx.png", sep = ""), height = 4, width = 4)
pgs_pheno_eur.long %>% filter((study == "ASDbrain" & Trait == "ASD_PGS") | (study == "LIBD" & Trait == "SCZ_PGS") | (study == "ROSMAP" & Trait == "AZD_PGS")) %>%
mutate(MatchDx = match(dx, c("CTL", "ASD", "SCZ", "AZD"))) %>%
mutate(dx = fct_reorder(dx, MatchDx)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = PGS, fill = dx)) +
geom_density(alpha = 0.5) +
geom_vline(data=mean.df2, aes(xintercept=mean, colour=dx), linetype="dashed", size=0.5) +
facet_wrap( ~ study, nrow = 1) +
scale_fill_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous"), name = "") +
theme_bw() + theme(legend.position = "bottom")
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx_1row.svg", sep = ""), height = 3, width = 10)
ggsave(paste(out_dir, "/pgs_EUR_studyxdx_distribution_pgsdx_1row.png", sep = ""), height = 3, width = 10)
For target trait
summary(lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.70680 -0.70067 0.03128 0.44351 1.59303
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.19978 0.20554 0.972 0.337
## dxASD -0.03142 0.26239 -0.120 0.905
##
## Residual standard error: 0.8475 on 42 degrees of freedom
## Multiple R-squared: 0.0003412, Adjusted R-squared: -0.02346
## F-statistic: 0.01434 on 1 and 42 DF, p-value: 0.9053
summary(lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3485 -0.6288 -0.0191 0.6558 3.0219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.21371 0.09572 2.233 0.0266 *
## dxSCZ 0.73259 0.14822 4.943 1.58e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.062 on 209 degrees of freedom
## Multiple R-squared: 0.1047, Adjusted R-squared: 0.1004
## F-statistic: 24.43 on 1 and 209 DF, p-value: 1.578e-06
summary(lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4817 -0.7085 -0.1000 0.6425 3.9801
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10067 0.05501 -1.830 0.0677 .
## dxAZD 0.66672 0.08532 7.814 2.37e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.05 on 621 degrees of freedom
## Multiple R-squared: 0.08953, Adjusted R-squared: 0.08806
## F-statistic: 61.06 on 1 and 621 DF, p-value: 2.37e-14
Height as a negative control
summary(lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.30563 -0.59799 0.02801 0.60879 2.03531
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04286 0.21901 -0.196 0.846
## dxASD -0.42674 0.27959 -1.526 0.134
##
## Residual standard error: 0.903 on 42 degrees of freedom
## Multiple R-squared: 0.05255, Adjusted R-squared: 0.02999
## F-statistic: 2.33 on 1 and 42 DF, p-value: 0.1344
summary(lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.62773 -0.61812 0.01064 0.72509 2.46164
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.30750 0.08563 -3.591 0.000411 ***
## dxSCZ 0.28937 0.13260 2.182 0.030205 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9497 on 209 degrees of freedom
## Multiple R-squared: 0.02228, Adjusted R-squared: 0.0176
## F-statistic: 4.762 on 1 and 209 DF, p-value: 0.0302
summary(lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6328 -0.5774 0.0599 0.7034 2.7155
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10591 0.04882 2.170 0.0304 *
## dxAZD -0.04362 0.07571 -0.576 0.5647
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9313 on 621 degrees of freedom
## Multiple R-squared: 0.0005343, Adjusted R-squared: -0.001075
## F-statistic: 0.332 on 1 and 621 DF, p-value: 0.5647
Other neuropsych traits
summary(lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.64764 -0.38676 -0.06134 0.38921 1.68266
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2321 0.1807 1.284 0.206
## dxASD -0.2572 0.2307 -1.115 0.271
##
## Residual standard error: 0.7452 on 42 degrees of freedom
## Multiple R-squared: 0.02874, Adjusted R-squared: 0.005614
## F-statistic: 1.243 on 1 and 42 DF, p-value: 0.2713
summary(lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7104 -0.6581 -0.0124 0.7187 2.5275
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.14183 0.08848 1.603 0.11
## dxSCZ 0.16867 0.13701 1.231 0.22
##
## Residual standard error: 0.9813 on 209 degrees of freedom
## Multiple R-squared: 0.007199, Adjusted R-squared: 0.002449
## F-statistic: 1.516 on 1 and 209 DF, p-value: 0.2197
summary(lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3406 -0.6688 0.0165 0.6718 2.7367
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05877 0.05321 -1.104 0.270
## dxAZD 0.04199 0.08252 0.509 0.611
##
## Residual standard error: 1.015 on 621 degrees of freedom
## Multiple R-squared: 0.0004167, Adjusted R-squared: -0.001193
## F-statistic: 0.2589 on 1 and 621 DF, p-value: 0.6111
summary(lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.83154 -0.48557 0.03916 0.47274 1.99466
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.4965 0.1804 -2.752 0.00872 **
## dxASD 0.6353 0.2304 2.758 0.00858 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.744 on 42 degrees of freedom
## Multiple R-squared: 0.1533, Adjusted R-squared: 0.1332
## F-statistic: 7.605 on 1 and 42 DF, p-value: 0.008578
summary(lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2447 -0.6284 -0.0321 0.6852 3.2895
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.38183 0.09529 -4.007 8.56e-05 ***
## dxSCZ 0.31194 0.14756 2.114 0.0357 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.057 on 209 degrees of freedom
## Multiple R-squared: 0.02094, Adjusted R-squared: 0.01625
## F-statistic: 4.469 on 1 and 209 DF, p-value: 0.0357
summary(lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study ==
## "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8826 -0.6633 -0.0003 0.6951 2.6612
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1522 0.0501 3.038 0.00248 **
## dxAZD 0.1034 0.0777 1.331 0.18362
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9558 on 621 degrees of freedom
## Multiple R-squared: 0.002845, Adjusted R-squared: 0.00124
## F-statistic: 1.772 on 1 and 621 DF, p-value: 0.1836
For target trait + genoPC covariates
summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.40854 -0.57909 0.02969 0.52221 1.58669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1899 0.2114 0.898 0.375
## dxASD 0.0173 0.2648 0.065 0.948
## PC1 1.8109 2.8351 0.639 0.527
## PC2 0.6257 7.3194 0.085 0.932
## PC3 7.7982 5.8407 1.335 0.190
##
## Residual standard error: 0.8332 on 39 degrees of freedom
## Multiple R-squared: 0.1027, Adjusted R-squared: 0.01067
## F-statistic: 1.116 on 4 and 39 DF, p-value: 0.3629
summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2426 -0.6028 0.0017 0.6458 2.5156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2266 0.1023 2.215 0.0278 *
## dxSCZ 0.7560 0.1451 5.211 4.55e-07 ***
## PC1 4.9724 2.1753 2.286 0.0233 *
## PC2 -0.2507 3.1578 -0.079 0.9368
## PC3 5.1390 3.0862 1.665 0.0974 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.037 on 206 degrees of freedom
## Multiple R-squared: 0.1585, Adjusted R-squared: 0.1422
## F-statistic: 9.703 on 4 and 206 DF, p-value: 3.289e-07
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4298 -0.7420 -0.0958 0.6509 4.0119
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10490 0.05526 -1.898 0.0581 .
## dxAZD 0.67210 0.08559 7.853 1.8e-14 ***
## PC1 -1.25063 1.44256 -0.867 0.3863
## PC2 1.45096 1.14018 1.273 0.2036
## PC3 -1.32891 1.18778 -1.119 0.2637
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.049 on 618 degrees of freedom
## Multiple R-squared: 0.0944, Adjusted R-squared: 0.08853
## F-statistic: 16.1 on 4 and 618 DF, p-value: 1.491e-12
Height as a negative control + covariates
summary(lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.51454 -0.45931 -0.03165 0.55819 1.78388
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08575 0.19458 0.441 0.66186
## dxASD -0.52923 0.24380 -2.171 0.03610 *
## PC1 -9.20878 2.61013 -3.528 0.00109 **
## PC2 -11.45093 6.73856 -1.699 0.09722 .
## PC3 -2.52388 5.37719 -0.469 0.64142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7671 on 39 degrees of freedom
## Multiple R-squared: 0.3651, Adjusted R-squared: 0.3
## F-statistic: 5.608 on 4 and 39 DF, p-value: 0.001153
summary(lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.47283 -0.58857 -0.00698 0.64378 2.41847
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.23945 0.08919 -2.685 0.00785 **
## dxSCZ 0.26948 0.12652 2.130 0.03436 *
## PC1 -8.69344 1.89708 -4.583 7.95e-06 ***
## PC2 -0.89584 2.75385 -0.325 0.74528
## PC3 0.37665 2.69144 0.140 0.88884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.904 on 206 degrees of freedom
## Multiple R-squared: 0.1269, Adjusted R-squared: 0.1099
## F-statistic: 7.484 on 4 and 206 DF, p-value: 1.198e-05
summary(lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.38146 -0.58125 0.03068 0.59682 2.39668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07089 0.04330 1.637 0.10213
## dxAZD -0.05106 0.06707 -0.761 0.44676
## PC1 -14.35108 1.13048 -12.695 < 2e-16 ***
## PC2 2.69660 0.89351 3.018 0.00265 **
## PC3 -4.74250 0.93081 -5.095 4.64e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8223 on 618 degrees of freedom
## Multiple R-squared: 0.2247, Adjusted R-squared: 0.2197
## F-statistic: 44.77 on 4 and 618 DF, p-value: < 2.2e-16
Check other trait-PGS combinations
summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.94717 -0.44216 0.00504 0.37978 1.69613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1297 0.1880 -0.690 0.49437
## dxASD 0.4012 0.2355 1.703 0.09644 .
## PC1 7.0216 2.5214 2.785 0.00822 **
## PC2 2.5921 6.5094 0.398 0.69265
## PC3 2.4814 5.1944 0.478 0.63552
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.741 on 39 degrees of freedom
## Multiple R-squared: 0.2993, Adjusted R-squared: 0.2274
## F-statistic: 4.164 on 4 and 39 DF, p-value: 0.006652
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.90427 -0.61964 -0.05869 0.35414 2.05747
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1270 0.2243 0.566 0.574
## dxASD 0.1972 0.2810 0.702 0.487
## PC1 3.9416 3.0082 1.310 0.198
## PC2 9.3476 7.7661 1.204 0.236
## PC3 1.1508 6.1972 0.186 0.854
##
## Residual standard error: 0.8841 on 39 degrees of freedom
## Multiple R-squared: 0.07464, Adjusted R-squared: -0.02026
## F-statistic: 0.7865 on 4 and 39 DF, p-value: 0.541
summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.92865 -0.61006 -0.04222 0.75846 2.48111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1349 0.1035 1.303 0.1939
## dxSCZ -0.2050 0.1469 -1.396 0.1642
## PC1 5.1967 2.2023 2.360 0.0192 *
## PC2 -1.5921 3.1969 -0.498 0.6190
## PC3 2.2101 3.1244 0.707 0.4801
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.049 on 206 degrees of freedom
## Multiple R-squared: 0.05482, Adjusted R-squared: 0.03647
## F-statistic: 2.987 on 4 and 206 DF, p-value: 0.01998
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.08422 -0.80958 -0.03644 0.61038 2.87367
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2938 0.1033 2.845 0.00489 **
## dxSCZ -0.1357 0.1465 -0.927 0.35521
## PC1 0.3239 2.1965 0.147 0.88292
## PC2 1.3772 3.1885 0.432 0.66624
## PC3 1.2873 3.1163 0.413 0.67996
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.047 on 206 degrees of freedom
## Multiple R-squared: 0.006619, Adjusted R-squared: -0.01267
## F-statistic: 0.3432 on 4 and 206 DF, p-value: 0.8486
summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7990 -0.6490 -0.0029 0.6411 2.5700
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05740 0.05159 -1.113 0.266319
## dxAZD -0.05185 0.07991 -0.649 0.516712
## PC1 4.85087 1.34693 3.601 0.000342 ***
## PC2 1.52963 1.06459 1.437 0.151274
## PC3 1.15893 1.10904 1.045 0.296436
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9797 on 618 degrees of freedom
## Multiple R-squared: 0.027, Adjusted R-squared: 0.02071
## F-statistic: 4.288 on 4 and 618 DF, p-value: 0.001981
summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.59015 -0.61562 -0.01351 0.63657 2.65252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06766 0.05034 -1.344 0.179473
## dxAZD 0.03916 0.07797 0.502 0.615701
## PC1 4.44107 1.31425 3.379 0.000773 ***
## PC2 -2.44845 1.03876 -2.357 0.018729 *
## PC3 6.81406 1.08212 6.297 5.76e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.956 on 618 degrees of freedom
## Multiple R-squared: 0.07866, Adjusted R-squared: 0.07269
## F-statistic: 13.19 on 4 and 618 DF, p-value: 2.568e-10
Other neuropsych traits
summary(lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.51991 -0.48935 -0.01293 0.41645 1.72214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.23680 0.19429 1.219 0.230
## dxASD -0.24577 0.24343 -1.010 0.319
## PC1 0.07066 2.60622 0.027 0.979
## PC2 -1.15552 6.72847 -0.172 0.865
## PC3 3.55877 5.36913 0.663 0.511
##
## Residual standard error: 0.7659 on 39 degrees of freedom
## Multiple R-squared: 0.04717, Adjusted R-squared: -0.05056
## F-statistic: 0.4827 on 4 and 39 DF, p-value: 0.7483
summary(lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.54716 -0.59004 -0.05149 0.64961 2.46207
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.08548 0.09613 0.889 0.3749
## dxSCZ 0.18649 0.13637 1.368 0.1729
## PC1 1.49830 2.04471 0.733 0.4645
## PC2 -6.16962 2.96816 -2.079 0.0389 *
## PC3 -0.94688 2.90089 -0.326 0.7444
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9743 on 206 degrees of freedom
## Multiple R-squared: 0.03535, Adjusted R-squared: 0.01662
## F-statistic: 1.887 on 4 and 206 DF, p-value: 0.1139
summary(lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3639 -0.6747 0.0236 0.6631 2.8146
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06134 0.05241 -1.170 0.2423
## dxAZD 0.01363 0.08117 0.168 0.8667
## PC1 1.40250 1.36819 1.025 0.3057
## PC2 -1.96213 1.08140 -1.814 0.0701 .
## PC3 5.55605 1.12654 4.932 1.05e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9952 on 618 degrees of freedom
## Multiple R-squared: 0.04395, Adjusted R-squared: 0.03777
## F-statistic: 7.103 on 4 and 618 DF, p-value: 1.354e-05
summary(lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")))
##
## Call:
## lm(formula = EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ASDbrain"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.67080 -0.33700 -0.02351 0.40997 1.15930
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6579 0.1652 -3.983 0.000288 ***
## dxASD 0.6056 0.2070 2.926 0.005699 **
## PC1 7.2264 2.2159 3.261 0.002308 **
## PC2 -2.0668 5.7208 -0.361 0.719838
## PC3 -3.9728 4.5651 -0.870 0.389484
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6512 on 39 degrees of freedom
## Multiple R-squared: 0.3976, Adjusted R-squared: 0.3358
## F-statistic: 6.436 on 4 and 39 DF, p-value: 0.0004462
summary(lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")))
##
## Call:
## lm(formula = EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "LIBD"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9466 -0.6697 0.0026 0.6319 3.2555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3707 0.1008 -3.678 0.00030 ***
## dxSCZ 0.3532 0.1430 2.470 0.01431 *
## PC1 2.0413 2.1438 0.952 0.34212
## PC2 -8.7283 3.1120 -2.805 0.00552 **
## PC3 7.2795 3.0415 2.393 0.01759 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.022 on 206 degrees of freedom
## Multiple R-squared: 0.09836, Adjusted R-squared: 0.08086
## F-statistic: 5.618 on 4 and 206 DF, p-value: 0.0002599
summary(lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))
##
## Call:
## lm(formula = EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>%
## filter(study == "ROSMAP"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.80814 -0.64290 0.02276 0.68834 2.28083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.16773 0.04928 3.404 0.000708 ***
## dxAZD 0.09505 0.07632 1.245 0.213480
## PC1 2.68199 1.28642 2.085 0.037493 *
## PC2 -4.99453 1.01676 -4.912 1.15e-06 ***
## PC3 1.96378 1.05921 1.854 0.064215 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9357 on 618 degrees of freedom
## Multiple R-squared: 0.04891, Adjusted R-squared: 0.04275
## F-statistic: 7.945 on 4 and 618 DF, p-value: 3.003e-06
summary(lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur))
##
## Call:
## lm(formula = ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.05213 -0.63461 -0.01263 0.66105 2.54391
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002554 0.044111 -0.058 0.954
## dxASD 0.094980 0.196814 0.483 0.630
## dxSCZ -0.072857 0.115052 -0.633 0.527
## dxAZD -0.103973 0.076414 -1.361 0.174
## PC1 5.171833 0.999518 5.174 2.84e-07 ***
## PC2 1.031131 0.992188 1.039 0.299
## PC3 1.120763 1.005392 1.115 0.265
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9898 on 871 degrees of freedom
## Multiple R-squared: 0.03684, Adjusted R-squared: 0.03021
## F-statistic: 5.553 on 6 and 871 DF, p-value: 1.16e-05
summary(lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur))
##
## Call:
## lm(formula = SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2486 -0.6409 -0.0291 0.6373 2.7723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004881 0.043168 0.113 0.910
## dxASD 0.285279 0.192609 1.481 0.139
## dxSCZ 0.985448 0.112593 8.752 < 2e-16 ***
## dxAZD -0.027260 0.074782 -0.365 0.716
## PC1 4.798916 0.978162 4.906 1.11e-06 ***
## PC2 -2.417490 0.970989 -2.490 0.013 *
## PC3 6.229686 0.983911 6.332 3.88e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9686 on 871 degrees of freedom
## Multiple R-squared: 0.1462, Adjusted R-squared: 0.1403
## F-statistic: 24.86 on 6 and 871 DF, p-value: < 2.2e-16
summary(lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur))
##
## Call:
## lm(formula = AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4786 -0.7476 -0.0937 0.6373 4.0187
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.002046 0.046638 -0.044 0.965
## dxASD 0.281628 0.208089 1.353 0.176
## dxSCZ 0.128455 0.121643 1.056 0.291
## dxAZD 0.577285 0.080792 7.145 1.9e-12 ***
## PC1 0.511076 1.056779 0.484 0.629
## PC2 1.122238 1.049030 1.070 0.285
## PC3 -1.128055 1.062990 -1.061 0.289
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.046 on 871 degrees of freedom
## Multiple R-squared: 0.05737, Adjusted R-squared: 0.05087
## F-statistic: 8.835 on 6 and 871 DF, p-value: 2.271e-09
Should I exclude the asd_exclude group, which are excluded for CTP balance purposes? Or fine for PGS purposes?
No covariates
# No covariates
tidy_df_nocov_asd_asdbrain <- parameters::model_parameters(model = lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_nocov_asd_libd <- parameters::model_parameters(model = lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_nocov_asd_rosmap <- parameters::model_parameters(model = lm(ASD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_nocov_asd <- rbind(tidy_df_nocov_asd_asdbrain, tidy_df_nocov_asd_libd, tidy_df_nocov_asd_rosmap)
tidy_df_nocov_scz_asdbrain <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_nocov_scz_libd <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_nocov_scz_rosmap <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_nocov_scz <- rbind(tidy_df_nocov_scz_asdbrain, tidy_df_nocov_scz_libd, tidy_df_nocov_scz_rosmap)
tidy_df_nocov_azd_asdbrain <- parameters::model_parameters(model = lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_nocov_azd_libd <- parameters::model_parameters(model = lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_nocov_azd_rosmap <- parameters::model_parameters(model = lm(AZD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_nocov_azd <- rbind(tidy_df_nocov_azd_asdbrain, tidy_df_nocov_azd_libd, tidy_df_nocov_azd_rosmap)
tidy_df_nocov_height_asdbrain <- parameters::model_parameters(model = lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_nocov_height_libd <- parameters::model_parameters(model = lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_nocov_height_rosmap <- parameters::model_parameters(model = lm(Height_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_nocov_height <- rbind(tidy_df_nocov_height_asdbrain, tidy_df_nocov_height_libd, tidy_df_nocov_height_rosmap)
tidy_df_nocov_mdd_asdbrain <- parameters::model_parameters(model = lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_nocov_mdd_libd <- parameters::model_parameters(model = lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_nocov_mdd_rosmap <- parameters::model_parameters(model = lm(MDD_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_nocov_mdd <- rbind(tidy_df_nocov_mdd_asdbrain, tidy_df_nocov_mdd_libd, tidy_df_nocov_mdd_rosmap)
tidy_df_nocov_eduyears_asdbrain <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_nocov_eduyears_libd <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_nocov_eduyears_rosmap <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_nocov_eduyears <- rbind(tidy_df_nocov_eduyears_asdbrain, tidy_df_nocov_eduyears_libd, tidy_df_nocov_eduyears_rosmap)
# rbind together
tidy_df_nocov_mega <- rbind(tidy_df_nocov_asd, tidy_df_nocov_scz, tidy_df_nocov_azd, tidy_df_nocov_height, tidy_df_nocov_mdd, tidy_df_nocov_eduyears) %>% filter(term != "(Intercept)")
# Add label
tidy_df_nocov_mega$lab <- ifelse(tidy_df_nocov_mega$p.value < 0.05, paste(
"b=", ifelse(abs(tidy_df_nocov_mega$estimate) < 0.01, formatC(tidy_df_nocov_mega$estimate, format = "e", digits = 1), round(tidy_df_nocov_mega$estimate, 2)), ", ",
"p=", ifelse(tidy_df_nocov_mega$p.value < 0.01, formatC(tidy_df_nocov_mega$p.value, format = "e", digits = 1), round(tidy_df_nocov_mega$p.value, 2)), ", ",
"df=", tidy_df_nocov_mega$df.error, sep = ""), NA)
# Plot
tidy_df_nocov_mega %>%
mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4,1)], name = "") +
xlab("diagnosis") +
ylab("estimate") +
coord_flip() +
theme_bw() + theme(legend.position = "none") +
facet_wrap(~ Trait, ncol = 3)
## Warning: Removed 13 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_nocov.svg", sep = ""), height = 3, width = 7)
## Warning: Removed 13 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_nocov.png", sep = ""), height = 3, width = 7)
## Warning: Removed 13 rows containing missing values (geom_label_repel).
Covariates
# Covariates
#tidy_df_cov_asd <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
#tidy_df_cov_scz <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
#tidy_df_cov_azd <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
#tidy_df_cov_height <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
# Don't fit jointly: do per study
tidy_df_cov_asd_asdbrain <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_cov_scz_asdbrain <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_cov_azd_asdbrain <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain") %>% filter(!IID %in% asd_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_cov_asd <- rbind(tidy_df_cov_asd_asdbrain, tidy_df_cov_scz_asdbrain, tidy_df_cov_azd_asdbrain)
tidy_df_cov_asd_libd <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_cov_scz_libd <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_cov_azd_libd <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD") %>% filter(!IID %in% scz_exclude$IID)), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_cov_scz <- rbind(tidy_df_cov_asd_libd, tidy_df_cov_scz_libd, tidy_df_cov_azd_libd)
tidy_df_cov_asd_rosmap <- parameters::model_parameters(model = lm(ASD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "ASD_PGS")
tidy_df_cov_scz_rosmap <- parameters::model_parameters(model = lm(SCZ_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "SCZ_PGS")
tidy_df_cov_azd_rosmap <- parameters::model_parameters(model = lm(AZD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "AZD_PGS")
tidy_df_cov_azd <- rbind(tidy_df_cov_asd_rosmap, tidy_df_cov_scz_rosmap, tidy_df_cov_azd_rosmap)
tidy_df_cov_asd_height <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_cov_scz_height <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_cov_azd_height <- parameters::model_parameters(model = lm(Height_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "Height_PGS")
tidy_df_cov_height <- rbind(tidy_df_cov_asd_height, tidy_df_cov_scz_height, tidy_df_cov_azd_height)
tidy_df_cov_asd_mdd <- parameters::model_parameters(model = lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_cov_scz_mdd <- parameters::model_parameters(model = lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_cov_azd_mdd <- parameters::model_parameters(model = lm(MDD_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "MDD_PGS")
tidy_df_cov_mdd <- rbind(tidy_df_cov_asd_mdd, tidy_df_cov_scz_mdd, tidy_df_cov_azd_mdd)
tidy_df_cov_asd_eduyears <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ASDbrain")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_cov_scz_eduyears <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "LIBD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_cov_azd_eduyears <- parameters::model_parameters(model = lm(EduYears_PGS ~ dx + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(Trait = "EduYears_PGS")
tidy_df_cov_eduyears <- rbind(tidy_df_cov_asd_eduyears, tidy_df_cov_scz_eduyears, tidy_df_cov_azd_eduyears)
# rbind together
tidy_df_cov_mega <- rbind(tidy_df_cov_asd, tidy_df_cov_scz, tidy_df_cov_azd, tidy_df_cov_height, tidy_df_cov_mdd, tidy_df_cov_eduyears) %>% filter(term %in% c("dxASD", "dxSCZ", "dxAZD"))
# Add label
tidy_df_cov_mega$lab <- ifelse(tidy_df_cov_mega$p.value < 0.05, paste(
"b=", ifelse(abs(tidy_df_cov_mega$estimate < 0.01), formatC(tidy_df_cov_mega$estimate, format = "e", digits = 1), round(tidy_df_cov_mega$estimate, 2)), ", ",
"p=", ifelse(tidy_df_cov_mega$p.value < 0.01, formatC(tidy_df_cov_mega$p.value, format = "e", digits = 1), round(tidy_df_cov_mega$p.value, 2)), ", ",
"df=", tidy_df_cov_mega$df.error, sep = ""), NA)
# Plot
tidy_df_cov_mega %>%
mutate(diagnosis = "diagnosis") %>%
mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
xlab("diagnosis") +
ylab("estimate (adjusting for geno PC1-3)") +
coord_flip() +
theme_bw() + theme(legend.position = "none") +
facet_grid(Trait ~ diagnosis)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1col.svg", sep = ""), height = 10, width = 3)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1col.png", sep = ""), height = 10, width = 3)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
tidy_df_cov_mega %>%
mutate(diagnosis = "diagnosis") %>%
mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
xlab("diagnosis") +
ylab("estimate (adjusting for geno PC1-3)") +
coord_flip() +
theme_bw() + theme(legend.position = "none") +
facet_wrap(~ Trait, nrow = 2)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
# facet_grid(Trait ~ diagnosis)
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_2row.svg", sep = ""), height = 6, width = 6)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_2row.png", sep = ""), height = 6, width = 6)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
# Plot
tidy_df_cov_mega %>%
mutate(diagnosis = "diagnosis") %>%
mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS", "Height_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
xlab("diagnosis") +
ylab("estimate (adjusting for geno PC1-3)") +
coord_flip() +
theme_bw() + theme(legend.position = "none") +
facet_wrap(~ Trait, nrow = 1)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
# facet_grid(Trait ~ diagnosis)
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1row.svg", sep = ""), height = 3, width = 10)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_1row.png", sep = ""), height = 3, width = 10)
## Warning: Removed 12 rows containing missing values (geom_label_repel).
# Plot
tidy_df_cov_mega %>% filter(Trait %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS")) %>%
mutate(diagnosis = "diagnosis") %>%
mutate(MatchTerm = match(term, c("dxASD", "dxSCZ", "dxAZD"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
ggplot(aes(x = fct_rev(term), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(4, name = "Zissou1", type = "continuous")[c(2,3,4)], name = "") +
theme_bw() + theme(legend.position = "none") +
xlab("diagnosis") +
ylab("estimate (adjusting for geno PC1-3)") +
coord_flip() +
facet_grid(Trait ~ diagnosis)
## Warning: Removed 7 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_asd_scz_azd.svg", sep = ""), height = 7, width = 3)
## Warning: Removed 7 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmdx_cov_asd_scz_azd.png", sep = ""), height = 7, width = 3)
## Warning: Removed 7 rows containing missing values (geom_label_repel).
#ctpn <- c("Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP")
ctpn <- c("Exc", "Inh", "Astro", "Endo", "Micro", "Oligo", "OPC")
pgs_pheno_ctp_eur.long <- melt(pgs_pheno_eur.long, measure.vars = all_of(ctpn), variable.name = "celltype", value.name = "CTP")
## Warning in melt(pgs_pheno_eur.long, measure.vars = all_of(ctpn),
## variable.name = "celltype", : The melt generic in data.table has been
## passed a data.frame and will attempt to redirect to the relevant reshape2
## method; please note that reshape2 is deprecated, and this redirection is now
## deprecated as well. To continue using melt methods from reshape2 while both
## libraries are attached, e.g. melt.list, you can prepend the namespace like
## reshape2::melt(pgs_pheno_eur.long). In the next version, this warning will
## become an error.
tidy_df_nocov_asd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")
tidy_df_nocov_scz <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")
tidy_df_nocov_azd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")
tidy_df_nocov_mdd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")
tidy_df_nocov_eduyears <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom")
# rbind together
tidy_df_nocov_mega <- rbind(tidy_df_nocov_asd, tidy_df_nocov_scz, tidy_df_nocov_azd, tidy_df_nocov_mdd, tidy_df_nocov_eduyears) %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)
# Add label
tidy_df_nocov_mega$lab <- ifelse(tidy_df_nocov_mega$p.value < 0.05, paste(
"b=", ifelse(abs(tidy_df_nocov_mega$estimate < 0.01), formatC(tidy_df_nocov_mega$estimate, format = "e", digits = 1), round(tidy_df_nocov_mega$estimate, 2)), ", ",
"p=", ifelse(tidy_df_nocov_mega$p.value < 0.01, formatC(tidy_df_nocov_mega$p.value, format = "e", digits = 1), round(tidy_df_nocov_mega$p.value, 2)), ", ",
"df=", tidy_df_nocov_mega$df.error, sep = ""), NA)
# Change cell-type names
tidy_df_nocov_mega$celltype <- tidy_df_nocov_mega$response
# Plot
tidy_df_nocov_mega %>%
mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
theme_bw() + theme(legend.position = "none") +
xlab("cell-type") +
ylab("estimate (clr-transformed, no covariates)") +
coord_flip() +
facet_wrap(~ term)
## Warning: Removed 18 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_nocov.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 18 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_nocov.png", sep = ""), height = 7, width = 7)
## Warning: Removed 18 rows containing missing values (geom_label_repel).
tidy_df_cov_asd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "ASD_PGS")
tidy_df_cov_scz <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "SCZ_PGS")
tidy_df_cov_azd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "AZD_PGS")
tidy_df_cov_mdd <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "MDD_PGS")
tidy_df_cov_eduyears <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "EduYears_PGS")
# rbind together
tidy_df_cov_mega_all <- rbind(tidy_df_cov_asd, tidy_df_cov_scz, tidy_df_cov_azd, tidy_df_cov_mdd, tidy_df_cov_eduyears)
datatable(tidy_df_cov_mega_all)
tidy_df_cov_mega <- tidy_df_cov_mega_all %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)
# Add label
tidy_df_cov_mega$lab <- ifelse(tidy_df_cov_mega$p.value < 0.05, paste(
"b=", ifelse(abs(tidy_df_cov_mega$estimate < 0.01), formatC(tidy_df_cov_mega$estimate, format = "e", digits = 1), round(tidy_df_cov_mega$estimate, 2)), ", ",
"p=", ifelse(tidy_df_cov_mega$p.value < 0.01, formatC(tidy_df_cov_mega$p.value, format = "e", digits = 1), round(tidy_df_cov_mega$p.value, 2)), ", ",
"df=", tidy_df_cov_mega$df.error, sep = ""), NA)
# Change cell-type names
tidy_df_cov_mega$celltype <- tidy_df_cov_mega$response
# Plot
tidy_df_cov_mega %>%
mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
theme_bw() + theme(legend.position = "none") +
xlab("cell-type") +
ylab("estimate (clr-transformed, adjusted for covariates)") +
coord_flip() +
facet_wrap(~ term)
## Warning: Removed 34 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 34 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov.png", sep = ""), height = 7, width = 7)
## Warning: Removed 34 rows containing missing values (geom_label_repel).
tidy_df_cov_asd_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "ASD_PGS")
tidy_df_cov_scz_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "SCZ_PGS")
tidy_df_cov_azd_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "AZD_PGS")
tidy_df_cov_mdd_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "MDD_PGS")
tidy_df_cov_eduyears_CTL <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "EduYears_PGS")
# rbind together
tidy_df_cov_mega_CTL_all <- rbind(tidy_df_cov_asd_CTL, tidy_df_cov_scz_CTL, tidy_df_cov_azd_CTL, tidy_df_cov_mdd_CTL, tidy_df_cov_eduyears_CTL)
datatable(tidy_df_cov_mega_CTL_all)
tidy_df_cov_mega_CTL <- tidy_df_cov_mega_CTL_all %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)
# Add label
tidy_df_cov_mega_CTL$lab <- ifelse(tidy_df_cov_mega_CTL$p.value < 0.05, paste(
"b=", ifelse(abs(tidy_df_cov_mega_CTL$estimate < 0.01), formatC(tidy_df_cov_mega_CTL$estimate, format = "e", digits = 1), round(tidy_df_cov_mega_CTL$estimate, 2)), ", ",
"p=", ifelse(tidy_df_cov_mega_CTL$p.value < 0.01, formatC(tidy_df_cov_mega_CTL$p.value, format = "e", digits = 1), round(tidy_df_cov_mega_CTL$p.value, 2)), ", ",
"df=", tidy_df_cov_mega_CTL$df.error, sep = ""), NA)
# Change cell-type names
tidy_df_cov_mega_CTL$celltype <- tidy_df_cov_mega_CTL$response
# Plot
tidy_df_cov_mega_CTL %>%
mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGs", "EduYears_PGS"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
theme_bw() + theme(legend.position = "none") +
xlab("cell-type") +
ylab("estimate (clr-transformed, adjusted for covariates) from nonDX") +
coord_flip() +
facet_wrap(~ term)
## Warning: Removed 33 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_CTLonly.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 33 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_CTLonly.png", sep = ""), height = 7, width = 7)
## Warning: Removed 33 rows containing missing values (geom_label_repel).
tidy_df_cov_asd_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ ASD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "ASD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "ASD_PGS")
tidy_df_cov_scz_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ SCZ_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "SCZ")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "SCZ_PGS")
tidy_df_cov_azd_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "AZD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "AZD_PGS")
tidy_df_cov_mdd_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ MDD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "AZD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "MDD_PGS")
tidy_df_cov_eduyears_DX <- parameters::model_parameters(model = lm(cbind(Exc, Inh, Astro, Endo, Micro, Oligo, OPC) ~ EduYears_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "AZD")), ci = 0.95, verbose = FALSE) %>% parameters::standardize_names(style = "broom") %>% mutate(study = "EduYears_PGS")
# rbind together
tidy_df_cov_mega_DX_all <- rbind(tidy_df_cov_asd_DX, tidy_df_cov_scz_DX, tidy_df_cov_azd_DX, tidy_df_cov_mdd_DX, tidy_df_cov_eduyears_DX)
datatable(tidy_df_cov_mega_DX_all)
tidy_df_cov_mega_DX <- tidy_df_cov_mega_DX_all %>% filter(term %in% c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")) %>% mutate(celltype = response)
# Add label
tidy_df_cov_mega_DX$lab <- ifelse(tidy_df_cov_mega_DX$p.value < 0.05, paste(
"b=", ifelse(abs(tidy_df_cov_mega_DX$estimate < 0.01), formatC(tidy_df_cov_mega_DX$estimate, format = "e", digits = 1), round(tidy_df_cov_mega_DX$estimate, 2)), ", ",
"p=", ifelse(tidy_df_cov_mega_DX$p.value < 0.01, formatC(tidy_df_cov_mega_DX$p.value, format = "e", digits = 1), round(tidy_df_cov_mega_DX$p.value, 2)), ", ",
"df=", tidy_df_cov_mega_DX$df.error, sep = ""), NA)
# Change cell-type names
tidy_df_cov_mega_DX$celltype <- tidy_df_cov_mega_DX$response
# Plot
tidy_df_cov_mega_DX %>%
mutate(MatchTerm = match(term, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
mutate(term = fct_reorder(term, MatchTerm)) %>%
mutate(MatchCell = match(celltype, all_of(ctpn))) %>%
mutate(celltype = fct_reorder(celltype, MatchCell)) %>%
ggplot(aes(x = fct_rev(celltype), y = estimate, colour = term, label = lab)) +
geom_pointrange(aes(ymin = conf.low, ymax = conf.high)) +
# geom_pointrange(aes(ymin = maxy, ymax = miny), position = position_jitter()) +
geom_hline(aes(yintercept = 0), linetype = "dotted", colour = "navy") +
geom_label_repel(size = 3.5, nudge_x = 0.15, force = 8, colour = "black", min.segment.length = 0) +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
theme_bw() + theme(legend.position = "none") +
xlab("cell-type") +
ylab("estimate (clr-transformed, adjusted for covariates) from DX") +
coord_flip() +
facet_wrap(~ term)
## Warning: Removed 35 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_DXonly.svg", sep = ""), height = 7, width = 7)
## Warning: Removed 35 rows containing missing values (geom_label_repel).
ggsave(paste(out_dir, "/pgs_EUR_lmCTPvPGS_cov_DXonly.png", sep = ""), height = 7, width = 7)
## Warning: Removed 35 rows containing missing values (geom_label_repel).
# No covariates
summary(lm(Endo ~ AZD_PGS, data = pgs_pheno_eur %>% filter(study == "ROSMAP") %>% filter(dx == "CTL")))
##
## Call:
## lm(formula = Endo ~ AZD_PGS, data = pgs_pheno_eur %>% filter(study ==
## "ROSMAP") %>% filter(dx == "CTL"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.308866 -0.058563 -0.002487 0.060605 0.255390
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.876899 0.004703 -186.45 <2e-16 ***
## AZD_PGS -0.007174 0.004848 -1.48 0.14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08924 on 362 degrees of freedom
## Multiple R-squared: 0.006011, Adjusted R-squared: 0.003265
## F-statistic: 2.189 on 1 and 362 DF, p-value: 0.1399
# With covariates
summary(lm(Endo ~ AZD_PGS + age + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP") %>% filter(dx == "CTL")))
##
## Call:
## lm(formula = Endo ~ AZD_PGS + age + sex + batch + PC1 + PC2 +
## PC3, data = pgs_pheno_eur %>% filter(study == "ROSMAP") %>%
## filter(dx == "CTL"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28258 -0.05738 0.00511 0.05844 0.25824
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.693804 0.079594 -8.717 < 2e-16 ***
## AZD_PGS -0.009206 0.004824 -1.908 0.0571 .
## age -0.001892 0.000921 -2.055 0.0406 *
## sexM -0.041800 0.009628 -4.341 1.85e-05 ***
## batchROSMAP1 -0.008749 0.009562 -0.915 0.3608
## PC1 -0.043222 0.163718 -0.264 0.7919
## PC2 -0.047736 0.129421 -0.369 0.7125
## PC3 -0.019359 0.135507 -0.143 0.8865
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08744 on 355 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.06415, Adjusted R-squared: 0.0457
## F-statistic: 3.477 on 7 and 355 DF, p-value: 0.001285
endo_azd_pgs <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
endo_azd_pgs
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 3.3936 3.3936 507.1352 < 2.2e-16 ***
## age2 1 0.0661 0.0661 9.8763 0.001776 **
## sex 1 0.1271 0.1271 18.9904 1.602e-05 ***
## batch 6 0.7313 0.1219 18.2137 < 2.2e-16 ***
## PC1 1 0.0012 0.0012 0.1801 0.671460
## PC2 1 0.0021 0.0021 0.3201 0.571831
## PC3 1 0.0000 0.0000 0.0032 0.954800
## AZD_PGS 1 0.0302 0.0302 4.5191 0.034018 *
## Residuals 489 3.2723 0.0067
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_batchfirst <- summary(aov(Endo ~ batch + sex + age + age2 + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
endo_azd_pgs_batchfirst
## Df Sum Sq Mean Sq F value Pr(>F)
## batch 6 3.9247 0.65411 97.7480 < 2.2e-16 ***
## sex 1 0.1515 0.15150 22.6392 2.577e-06 ***
## age 1 0.2406 0.24062 35.9567 3.924e-09 ***
## age2 1 0.0013 0.00134 0.2001 0.65483
## PC1 1 0.0012 0.00121 0.1801 0.67146
## PC2 1 0.0021 0.00214 0.3201 0.57183
## PC3 1 0.0000 0.00002 0.0032 0.95480
## AZD_PGS 1 0.0302 0.03024 4.5191 0.03402 *
## Residuals 489 3.2723 0.00669
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_ctldx <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + dx + dx:AZD_PGS, data = pgs_pheno_eur))[[1]]
endo_azd_pgs_ctldx
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 7.4353 7.4353 984.4870 < 2.2e-16 ***
## age2 1 0.1482 0.1482 19.6264 1.064e-05 ***
## sex 1 0.0833 0.0833 11.0240 0.0009375 ***
## batch 8 1.4194 0.1774 23.4919 < 2.2e-16 ***
## PC1 1 0.0054 0.0054 0.7215 0.3959043
## PC2 1 0.0022 0.0022 0.2950 0.5871606
## PC3 1 0.0098 0.0098 1.2973 0.2550303
## dx 3 0.1883 0.0628 8.3121 1.877e-05 ***
## dx:AZD_PGS 4 0.0583 0.0146 1.9290 0.1036017
## Residuals 855 6.4574 0.0076
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_addROSMAP <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL" | study == "ROSMAP")))[[1]]
endo_azd_pgs_addROSMAP
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 4.5613 4.5613 572.3515 < 2.2e-16 ***
## age2 1 0.0889 0.0889 11.1597 0.0008774 ***
## sex 1 0.0927 0.0927 11.6297 0.0006839 ***
## batch 6 0.8069 0.1345 16.8753 < 2.2e-16 ***
## PC1 1 0.0019 0.0019 0.2323 0.6299788
## PC2 1 0.0051 0.0051 0.6374 0.4249105
## PC3 1 0.0061 0.0061 0.7707 0.3802758
## AZD_PGS 1 0.1200 0.1200 15.0612 0.0001133 ***
## Residuals 748 5.9611 0.0080
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
endo_azd_pgs_onlyROSMAP <- summary(aov(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(study == "ROSMAP")))[[1]]
endo_azd_pgs_onlyROSMAP
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 0.0500 0.050027 5.6893 0.0173716 *
## age2 1 0.0078 0.007813 0.8885 0.3462521
## sex 1 0.0931 0.093124 10.5904 0.0011993 **
## batch 1 0.0818 0.081811 9.3039 0.0023855 **
## PC1 1 0.0014 0.001417 0.1612 0.6882035
## PC2 1 0.0042 0.004156 0.4726 0.4920314
## PC3 1 0.0066 0.006576 0.7478 0.3875050
## AZD_PGS 1 0.1250 0.125005 14.2160 0.0001788 ***
## Residuals 613 5.3903 0.008793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#astro_scz_pgs <- summary(aov(Astro ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + SCZ_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
#astro_scz_pgs
#astro_scz_pgs_ctldx <- summary(aov(Astro ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + dx + dx:SCZ_PGS, data = pgs_pheno_eur))[[1]]
#astro_scz_pgs_ctldx
oligo_scz_pgs <- summary(aov(Oligo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + SCZ_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL")))[[1]]
oligo_scz_pgs
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 0.0335 0.03346 0.6610 0.41660
## age2 1 1.5943 1.59431 31.4973 3.35e-08 ***
## sex 1 0.0348 0.03478 0.6870 0.40758
## batch 6 8.4344 1.40574 27.7719 < 2.2e-16 ***
## PC1 1 0.0111 0.01108 0.2188 0.64014
## PC2 1 0.0825 0.08253 1.6305 0.20224
## PC3 1 0.0120 0.01200 0.2370 0.62661
## SCZ_PGS 1 0.1387 0.13869 2.7400 0.09851 .
## Residuals 489 24.7519 0.05062
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
oligo_scz_pgs_ctldx <- summary(aov(Oligo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + dx + dx:SCZ_PGS, data = pgs_pheno_eur))[[1]]
oligo_scz_pgs_ctldx
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 0.003 0.00281 0.0575 0.8106
## age2 1 3.081 3.08131 63.0021 6.468e-15 ***
## sex 1 0.024 0.02417 0.4942 0.4822
## batch 8 15.532 1.94155 39.6980 < 2.2e-16 ***
## PC1 1 0.061 0.06060 1.2390 0.2660
## PC2 1 0.080 0.07991 1.6340 0.2015
## PC3 1 0.002 0.00231 0.0472 0.8280
## dx 3 0.267 0.08894 1.8186 0.1422
## dx:SCZ_PGS 4 0.143 0.03586 0.7333 0.5694
## Residuals 855 41.816 0.04891
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Check using alternative method
mod0 <- lm(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3, data = pgs_pheno_eur %>% filter(dx == "CTL"))
mod1 <- lm(Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS, data = pgs_pheno_eur %>% filter(dx == "CTL"))
anova(mod0, mod1)
## Analysis of Variance Table
##
## Model 1: Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3
## Model 2: Endo ~ age + age2 + sex + batch + PC1 + PC2 + PC3 + AZD_PGS
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 490 3.3025
## 2 489 3.2723 1 0.030241 4.5191 0.03402 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Summary tables
endo_azd_pgs_sumsq <- endo_azd_pgs$'Sum Sq'[1:(nrow(endo_azd_pgs)-1)]
endo_azd_pgs_sumsq <- endo_azd_pgs$'Sum Sq'
endo_azd_pgs_totalsumsq <- sum(endo_azd_pgs$'Sum Sq')
endo_azd_pgs_sumsq_summary <- data.frame(variable = rownames(endo_azd_pgs), SumSq = endo_azd_pgs_sumsq, var_pc = endo_azd_pgs_sumsq/endo_azd_pgs_totalsumsq*100, Cumulative = cumsum(endo_azd_pgs_sumsq), p = endo_azd_pgs$'Pr(>F)', CTP_PGS = "Endo/AZD_PGS (CTL)")
endo_azd_pgs_sumsq_summary
## variable SumSq var_pc Cumulative p
## 1 age 3.393648e+00 4.451263e+01 3.393648 1.412548e-77
## 2 age2 6.609031e-02 8.668706e-01 3.459738 1.775830e-03
## 3 sex 1.270802e-01 1.666842e+00 3.586818 1.602101e-05
## 4 batch 7.312927e-01 9.591968e+00 4.318111 4.003882e-19
## 5 PC1 1.205303e-03 1.580930e-02 4.319316 6.714599e-01
## 6 PC2 2.141781e-03 2.809257e-02 4.321458 5.718308e-01
## 7 PC3 2.152021e-05 2.822689e-04 4.321480 9.548002e-01
## 8 AZD_PGS 3.024110e-02 3.966560e-01 4.351721 3.401840e-02
## 9 Residuals 3.272291e+00 4.292085e+01 7.624011 NA
## CTP_PGS
## 1 Endo/AZD_PGS (CTL)
## 2 Endo/AZD_PGS (CTL)
## 3 Endo/AZD_PGS (CTL)
## 4 Endo/AZD_PGS (CTL)
## 5 Endo/AZD_PGS (CTL)
## 6 Endo/AZD_PGS (CTL)
## 7 Endo/AZD_PGS (CTL)
## 8 Endo/AZD_PGS (CTL)
## 9 Endo/AZD_PGS (CTL)
#astro_scz_pgs_sumsq <- astro_scz_pgs$'Sum Sq'[1:(nrow(astro_scz_pgs)-1)]
#astro_scz_pgs_sumsq <- astro_scz_pgs$'Sum Sq'
#astro_scz_pgs_totalsumsq <- sum(astro_scz_pgs$'Sum Sq')
#astro_scz_pgs_sumsq_summary <- data.frame(variable = rownames(astro_scz_pgs), SumSq = astro_scz_pgs_sumsq, var_pc = astro_scz_pgs_sumsq/astro_scz_pgs_totalsumsq*100, Cumulative = cumsum(astro_scz_pgs_sumsq), p = astro_scz_pgs$'Pr(>F)', CTP_PGS = "Astro/SCZ_PGS (CTL)")
#astro_scz_pgs_sumsq_summary
oligo_scz_pgs_sumsq <- oligo_scz_pgs$'Sum Sq'[1:(nrow(oligo_scz_pgs)-1)]
oligo_scz_pgs_sumsq <- oligo_scz_pgs$'Sum Sq'
oligo_scz_pgs_totalsumsq <- sum(oligo_scz_pgs$'Sum Sq')
oligo_scz_pgs_sumsq_summary <- data.frame(variable = rownames(oligo_scz_pgs), SumSq = oligo_scz_pgs_sumsq, var_pc = oligo_scz_pgs_sumsq/oligo_scz_pgs_totalsumsq*100, Cumulative = cumsum(oligo_scz_pgs_sumsq), p = oligo_scz_pgs$'Pr(>F)', CTP_PGS = "Oligo/SCZ_PGS (CTL)")
oligo_scz_pgs_sumsq_summary
## variable SumSq var_pc Cumulative p
## 1 age 0.03345882 0.09534283 0.03345882 4.165976e-01
## 2 age2 1.59431182 4.54308395 1.62777063 3.349931e-08
## 3 sex 0.03477507 0.09909358 1.66254570 4.075847e-01
## 4 batch 8.43443640 24.03441554 10.09698210 1.459263e-28
## 5 PC1 0.01107684 0.03156409 10.10805894 6.401376e-01
## 6 PC2 0.08253113 0.23517723 10.19059007 2.022412e-01
## 7 PC3 0.01199582 0.03418278 10.20258589 6.266065e-01
## 8 SCZ_PGS 0.13869187 0.39521051 10.34127775 9.850597e-02
## 9 Residuals 24.75188433 70.53192948 35.09316208 NA
## CTP_PGS
## 1 Oligo/SCZ_PGS (CTL)
## 2 Oligo/SCZ_PGS (CTL)
## 3 Oligo/SCZ_PGS (CTL)
## 4 Oligo/SCZ_PGS (CTL)
## 5 Oligo/SCZ_PGS (CTL)
## 6 Oligo/SCZ_PGS (CTL)
## 7 Oligo/SCZ_PGS (CTL)
## 8 Oligo/SCZ_PGS (CTL)
## 9 Oligo/SCZ_PGS (CTL)
endo_azd_pgs_addROSMAP_sumsq <- endo_azd_pgs_addROSMAP$'Sum Sq'[1:(nrow(endo_azd_pgs_addROSMAP)-1)]
endo_azd_pgs_addROSMAP_sumsq <- endo_azd_pgs_addROSMAP$'Sum Sq'
endo_azd_pgs_addROSMAP_totalsumsq <- sum(endo_azd_pgs_addROSMAP$'Sum Sq')
endo_azd_pgs_addROSMAP_sumsq_summary <- data.frame(variable = rownames(endo_azd_pgs_addROSMAP), SumSq = endo_azd_pgs_addROSMAP_sumsq, var_pc = endo_azd_pgs_addROSMAP_sumsq/endo_azd_pgs_addROSMAP_totalsumsq*100, Cumulative = cumsum(endo_azd_pgs_addROSMAP_sumsq), p = endo_azd_pgs_addROSMAP$'Pr(>F)', CTP_PGS = "Endo/AZD_PGS (CTL+AZD)")
endo_azd_pgs_addROSMAP_sumsq_summary
## variable SumSq var_pc Cumulative p
## 1 age 4.561323380 39.17279366 4.561323 2.222843e-94
## 2 age2 0.088936947 0.76379339 4.650260 8.773950e-04
## 3 sex 0.092682144 0.79595727 4.742942 6.838984e-04
## 4 batch 0.806921161 6.92986520 5.549864 2.497876e-18
## 5 PC1 0.001851148 0.01589772 5.551715 6.299788e-01
## 6 PC2 0.005079652 0.04362422 5.556794 4.249105e-01
## 7 PC3 0.006142224 0.05274962 5.562937 3.802758e-01
## 8 AZD_PGS 0.120029204 1.03081471 5.682966 1.132667e-04
## 9 Residuals 5.961144641 51.19450421 11.644111 NA
## CTP_PGS
## 1 Endo/AZD_PGS (CTL+AZD)
## 2 Endo/AZD_PGS (CTL+AZD)
## 3 Endo/AZD_PGS (CTL+AZD)
## 4 Endo/AZD_PGS (CTL+AZD)
## 5 Endo/AZD_PGS (CTL+AZD)
## 6 Endo/AZD_PGS (CTL+AZD)
## 7 Endo/AZD_PGS (CTL+AZD)
## 8 Endo/AZD_PGS (CTL+AZD)
## 9 Endo/AZD_PGS (CTL+AZD)
endo_azd_pgs_onlyROSMAP_sumsq <- endo_azd_pgs_onlyROSMAP$'Sum Sq'[1:(nrow(endo_azd_pgs_onlyROSMAP)-1)]
endo_azd_pgs_onlyROSMAP_sumsq <- endo_azd_pgs_onlyROSMAP$'Sum Sq'
endo_azd_pgs_onlyROSMAP_totalsumsq <- sum(endo_azd_pgs_onlyROSMAP$'Sum Sq')
endo_azd_pgs_onlyROSMAP_sumsq_summary <- data.frame(variable = rownames(endo_azd_pgs_onlyROSMAP), SumSq = endo_azd_pgs_onlyROSMAP_sumsq, var_pc = endo_azd_pgs_onlyROSMAP_sumsq/endo_azd_pgs_onlyROSMAP_totalsumsq*100, Cumulative = cumsum(endo_azd_pgs_onlyROSMAP_sumsq), p = endo_azd_pgs_onlyROSMAP$'Pr(>F)', CTP_PGS = "Endo/AZD_PGS (ROSMAP)")
endo_azd_pgs_onlyROSMAP_sumsq_summary
## variable SumSq var_pc Cumulative p
## 1 age 0.050027458 0.86850146 0.05002746 0.0173715973
## 2 age2 0.007812928 0.13563629 0.05784039 0.3462521265
## 3 sex 0.093124296 1.61668390 0.15096468 0.0011993044
## 4 batch 0.081811265 1.42028409 0.23277595 0.0023854596
## 5 PC1 0.001417384 0.02460648 0.23419333 0.6882035473
## 6 PC2 0.004156124 0.07215237 0.23834945 0.4920313749
## 7 PC3 0.006575799 0.11415912 0.24492525 0.3875050180
## 8 AZD_PGS 0.125004815 2.17014550 0.36993007 0.0001787543
## 9 Residuals 5.390274230 93.57783077 5.76020430 NA
## CTP_PGS
## 1 Endo/AZD_PGS (ROSMAP)
## 2 Endo/AZD_PGS (ROSMAP)
## 3 Endo/AZD_PGS (ROSMAP)
## 4 Endo/AZD_PGS (ROSMAP)
## 5 Endo/AZD_PGS (ROSMAP)
## 6 Endo/AZD_PGS (ROSMAP)
## 7 Endo/AZD_PGS (ROSMAP)
## 8 Endo/AZD_PGS (ROSMAP)
## 9 Endo/AZD_PGS (ROSMAP)
#ctp_pgs_summary <- rbind(endo_azd_pgs_sumsq_summary, endo_azd_pgs_addROSMAP_sumsq_summary, endo_azd_pgs_onlyROSMAP_sumsq_summary, astro_scz_pgs_sumsq_summary, oligo_scz_pgs_sumsq_summary)
ctp_pgs_summary <- rbind(endo_azd_pgs_sumsq_summary, endo_azd_pgs_addROSMAP_sumsq_summary, endo_azd_pgs_onlyROSMAP_sumsq_summary, oligo_scz_pgs_sumsq_summary)
ctp_pgs_summary %>%
mutate(variable = gsub("[[:blank:]]", "", variable)) %>%
filter(variable != "Residuals") %>%
mutate(variable = gsub("SCZ_PGS", "PGS", variable)) %>%
mutate(variable = gsub("AZD_PGS", "PGS", variable)) %>%
mutate(MatchVariable = match(variable, unique(variable))) %>%
mutate(MatchAnalysis = match(CTP_PGS, unique(CTP_PGS))) %>%
mutate(variable = fct_reorder(variable, rev(MatchVariable))) %>%
mutate(CTP_PGS = fct_reorder(CTP_PGS, rev(MatchAnalysis))) %>%
ggplot(aes(x = CTP_PGS, y = var_pc, fill = variable)) +
geom_bar(position="stack", stat="identity") +
scale_fill_brewer(palette = "Dark2") +
theme_bw() +
ylab("Variance % explained of clr-transformed CTP") + xlab("Association tested") +
coord_flip()
ggsave(paste(out_dir, "/pgs_EUR_anova_varpc_cov_pgssigassoc.svg", sep = ""), height = 4, width = 7)
ggsave(paste(out_dir, "/pgs_EUR_anova_varpc_cov_pgssigassoc.png", sep = ""), height = 4, width = 7)
pgs_pheno_ctp_eur.long$celltypen <- pgs_pheno_ctp_eur.long$celltype
pgs_pheno_ctp_eur.long %>%
filter(Trait != "Height_PGS") %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>%
ggplot(aes(x = PGS, y = CTP)) +
geom_point(aes(colour = Trait)) +
geom_smooth(method = "lm") +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
ylab("CTP (clr-transformed)") +
theme_bw() + theme(legend.position = "none") +
facet_grid(celltypen ~ Trait, scales = "free")
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_nocov_ALL.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_nocov_ALL.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
batch_coef <- pgs_pheno_ctp_eur.long %>% group_by(celltype) %>%
do(model = tidy(lm(CTP ~ batch, data = .))) %>% unnest(model) %>%
mutate(term = gsub("batch", "", term)) %>%
dplyr::rename(batch = term) %>%
dplyr::select(c("celltype", "batch", "estimate"))
# Modify CTP in light of batch adjustment
# - subtraction as it's relative to a given batch (eg. coefficient +1, means that batch has +1 unit higher on average --> need to push back down)
pgs_pheno_ctp_eur.long %>%
filter(Trait != "Height_PGS") %>%
left_join(., batch_coef, by = c("celltype", "batch")) %>%
mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>%
ggplot(aes(x = PGS, y = CTP_batchadjust)) +
geom_point(aes(colour = Trait)) +
geom_smooth(method = "lm") +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
ylab("CTP (clr-transformed, adjusted for batch)") +
theme_bw() + theme(legend.position = "none") +
facet_grid(celltypen ~ Trait, scales = "free")
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_ALL.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_ALL.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
batch_coef <- pgs_pheno_ctp_eur.long %>% group_by(celltype) %>%
do(model = tidy(lm(CTP ~ batch, data = .))) %>% unnest(model) %>%
mutate(term = gsub("batch", "", term)) %>%
dplyr::rename(batch = term) %>%
dplyr::select(c("celltype", "batch", "estimate"))
# Modify CTP in light of batch adjustment
# - subtraction as it's relative to a given batch (eg. coefficient +1, means that batch has +1 unit higher on average --> need to push back down)
pgs_pheno_ctp_eur.long %>% filter(dx == "CTL") %>%
filter(Trait != "Height_PGS") %>%
left_join(., batch_coef, by = c("celltype", "batch")) %>%
mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>%
ggplot(aes(x = PGS, y = CTP_batchadjust)) +
geom_point(aes(colour = Trait)) +
geom_smooth(method = "lm") +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
ylab("CTP (clr-transformed, adjusted for batch, CTL only)") +
theme_bw() + theme(legend.position = "none") +
facet_grid(celltypen ~ Trait, scales = "free")
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_CTLonly.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_CTLonly.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
pgs_pheno_ctp_eur.long %>% filter(dx == "CTL") %>%
filter(Trait != "Height_PGS") %>%
left_join(., batch_coef, by = c("celltype", "batch")) %>%
mutate(CTP_batchadjust = ifelse(is.na(estimate), CTP, CTP - estimate)) %>%
mutate(MatchTrait = match(Trait, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"))) %>%
mutate(Trait = fct_reorder(Trait, MatchTrait)) %>%
mutate(MatchCell = match(celltypen, all_of(ctpn))) %>%
mutate(celltypen = fct_reorder(celltypen, MatchCell)) %>%
ggplot(aes(x = PGS, y = CTP_batchadjust)) +
geom_point(aes(colour = Trait)) +
geom_smooth(method = "lm") +
scale_colour_manual(values = wes_palette(7, name = "Zissou1", type = "continuous")[c(3,4,7,6,1)], name = "") +
ylab("CTP (clr-transformed, adjusted for batch, CTL only)") +
coord_cartesian(ylim=c(-2, 2)) +
theme_bw() + theme(legend.position = "none") +
facet_grid(celltypen ~ Trait)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_neg2to2_CTLonly.svg", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
ggsave(paste(out_dir, "/pgs_EUR_scatterCTPvPGS_adjbatch_neg2to2_CTLonly.png", sep = ""), height = 7, width = 5)
## `geom_smooth()` using formula 'y ~ x'
pgs_pheno_ctp.cor <- pgs_pheno_ctp_eur.long %>% group_by(Trait, celltype) %>% dplyr::summarise(r_pearson = cor.test(PGS, CTP)$estimate, p = cor.test(PGS, CTP)$p.value)
## `summarise()` has grouped output by 'Trait'. You can override using the
## `.groups` argument.
pgs_pheno_ctp.cor %>% dplyr::select(c("Trait", "celltype", "r_pearson")) %>% spread(Trait, r_pearson)
## # A tibble: 7 × 7
## celltype ASD_PGS SCZ_PGS AZD_PGS EduYears_PGS MDD_PGS Height_PGS
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Exc -0.0574 -0.150 -0.0226 0.181 -0.0837 0.120
## 2 Inh -0.0571 -0.124 0.00608 0.119 -0.0314 0.101
## 3 Astro -0.0600 -0.130 -0.0568 0.112 -0.0121 0.125
## 4 Endo 0.0564 0.103 -0.0774 -0.185 0.0893 -0.0808
## 5 Micro 0.0561 0.128 0.00572 -0.180 0.0948 -0.108
## 6 Oligo -0.00271 0.0796 0.00259 -0.00266 -0.0157 0.0184
## 7 OPC 0.0479 0.0755 0.0613 -0.0808 0.0116 -0.117
pgs_pheno_ctp.cor %>% dplyr::select(c("Trait", "celltype", "p")) %>% spread(Trait, p)
## # A tibble: 7 × 7
## celltype ASD_PGS SCZ_PGS AZD_PGS EduYears_PGS MDD_PGS Height_PGS
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Exc 0.0889 0.00000802 0.503 0.0000000651 0.0131 0.000352
## 2 Inh 0.0908 0.000229 0.857 0.000427 0.353 0.00274
## 3 Astro 0.0756 0.000110 0.0926 0.000863 0.719 0.000200
## 4 Endo 0.0951 0.00236 0.0218 0.0000000364 0.00810 0.0166
## 5 Micro 0.0965 0.000147 0.865 0.0000000819 0.00491 0.00137
## 6 Oligo 0.936 0.0183 0.939 0.937 0.643 0.586
## 7 OPC 0.156 0.0253 0.0693 0.0166 0.731 0.000498
#keepn <- c("Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP", "ASD_PGS", "SCZ_PGS", "AZD_PGS")
keepn <- c(ctpn, "ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")
g <- ggduo(pgs_pheno_eur, ctpn, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"), xlab = "CTPs", ylab = "PGS", title = "Brain CTPs vs Neuropsychiatric PGS")
#ggpairs(pgs_pheno_eur %>% dplyr::select(all_of(keepn)))
pgs_pheno_ctp_CTL.cor <- pgs_pheno_ctp_eur.long %>% filter(dx == "CTL") %>% group_by(Trait, celltype) %>% dplyr::summarise(r_pearson = cor.test(PGS, CTP)$estimate, p = cor.test(PGS, CTP)$p.value)
## `summarise()` has grouped output by 'Trait'. You can override using the
## `.groups` argument.
pgs_pheno_ctp_CTL.cor %>% dplyr::select(c("Trait", "celltype", "r_pearson")) %>% spread(Trait, r_pearson)
## # A tibble: 7 × 7
## celltype ASD_PGS SCZ_PGS AZD_PGS EduYears_PGS MDD_PGS Height_PGS
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Exc -0.0655 -0.114 -0.0973 0.220 -0.0916 0.142
## 2 Inh -0.0557 -0.0452 -0.0770 0.149 -0.0178 0.0944
## 3 Astro -0.0779 -0.123 -0.107 0.159 -0.0470 0.133
## 4 Endo 0.0345 -0.00145 0.0322 -0.224 0.0767 -0.0819
## 5 Micro 0.0568 0.108 0.0910 -0.246 0.112 -0.103
## 6 Oligo 0.0169 0.0908 0.0259 0.0211 -0.0345 0.00593
## 7 OPC 0.0546 0.0441 0.0760 -0.104 0.0333 -0.126
pgs_pheno_ctp_CTL.cor %>% dplyr::select(c("Trait", "celltype", "p")) %>% spread(Trait, p)
## # A tibble: 7 × 7
## celltype ASD_PGS SCZ_PGS AZD_PGS EduYears_PGS MDD_PGS Height_PGS
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Exc 0.142 0.0104 0.0290 0.000000578 0.0398 0.00135
## 2 Inh 0.212 0.311 0.0843 0.000776 0.690 0.0340
## 3 Astro 0.0805 0.00562 0.0161 0.000325 0.292 0.00275
## 4 Endo 0.440 0.974 0.470 0.000000361 0.0855 0.0661
## 5 Micro 0.203 0.0149 0.0412 0.0000000217 0.0118 0.0204
## 6 Oligo 0.705 0.0416 0.562 0.637 0.439 0.894
## 7 OPC 0.221 0.323 0.0884 0.0190 0.456 0.00449
#keepn <- c("Exc", "Inh", "NonN_Astro_FGF3R", "NonN_Micro.Endo_TYROBP", "NonN_Oligo_MBP", "ASD_PGS", "SCZ_PGS", "AZD_PGS")
keepn <- c(ctpn, "ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS")
#ggpairs(pgs_pheno_eur %>% filter(dx == "CTL") %>% dplyr::select(all_of(keepn)))
g <- ggduo(pgs_pheno_eur %>% filter(dx == "CTL"), ctpn, c("ASD_PGS", "SCZ_PGS", "AZD_PGS", "MDD_PGS", "EduYears_PGS"), xlab = "CTPs", ylab = "PGS", title = "Brain CTPs vs Neuropsychiatric PGS")